{-
    BNF Converter: Happy Generator
    Copyright (C) 2004  Author:  Markus Forberg, Aarne Ranta

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
-}

{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}

module BNFC.Backend.Haskell.CFtoHappy (cf2Happy, convert) where

import Prelude hiding ((<>))

import Data.List (intersperse)

import BNFC.CF
import BNFC.Backend.Common.StrUtils (escapeChars)
import BNFC.Backend.Haskell.Utils
import BNFC.Options (HappyMode(..), TokenText(..))
import BNFC.PrettyPrint
import BNFC.Utils

-- Type declarations

type Rules       = [(NonTerminal,[(Pattern,Action)])]
type Pattern     = String
type Action      = String
type MetaVar     = String

-- default naming

tokenName   = "Token"

-- | Generate a happy parser file from a grammar.

cf2Happy
  :: ModuleName -- ^ This module's name.
  -> ModuleName -- ^ Abstract syntax module name.
  -> ModuleName -- ^ Lexer module name.
  -> HappyMode  -- ^ Happy mode.
  -> TokenText  -- ^ Use @ByteString@ or @Text@?
  -> Bool       -- ^ AST is a functor?
  -> CF         -- ^ Grammar.
  -> String     -- ^ Generated code.
cf2Happy name absName lexName mode tokenText functor cf = unlines
  [ header name absName lexName tokenText
  , render $ declarations mode (allEntryPoints cf)
  , render $ tokens cf
  , delimiter
  , specialRules absName tokenText cf
  , render $ prRules absName functor (rulesForHappy absName functor cf)
  , footer
  ]

-- | Construct the header.
header :: ModuleName -> ModuleName -> ModuleName -> TokenText -> String
header modName absName lexName tokenText = unlines $ concat
  [ [ "-- This Happy file was machine-generated by the BNF converter"
    , "{"
    , "{-# OPTIONS_GHC -fno-warn-incomplete-patterns -fno-warn-overlapping-patterns #-}"
    , "module " ++ modName ++ " where"
    , "import qualified " ++ absName
    , "import " ++ lexName
    ]
  , tokenTextImport tokenText
  , [ "}"
    ]
  ]

-- | The declarations of a happy file.
-- >>> declarations Standard [Cat "A", Cat "B", ListCat (Cat "B")]
-- %name pA A
-- %name pB B
-- %name pListB ListB
-- -- no lexer declaration
-- %monad { Either String } { (>>=) } { return }
-- %tokentype {Token}
declarations :: HappyMode -> [Cat] -> Doc
declarations mode ns = vcat
    [ vcat $ map generateP ns
    , case mode of
        Standard -> "-- no lexer declaration"
        GLR      -> "%lexer { myLexer } { Either String _ }",
      "%monad { Either String } { (>>=) } { return }",
      "%tokentype" <+> braces (text tokenName)
    ]
  where
  generateP n = "%name" <+> parserName n <+> text (identCat n)

-- The useless delimiter symbol.
delimiter :: String
delimiter = "\n%%\n"

-- | Generate the list of tokens and their identifiers.
tokens :: CF -> Doc
tokens cf
  -- Andreas, 2019-01-02: "%token" followed by nothing is a Happy parse error.
  -- Thus, if we have no tokens, do not output anything.
  | null ts   = empty
  | otherwise = "%token" $$ (nest 2 $ vcat ts)
  where
    ts            = map prToken (cfTokens cf) ++ map text (specialToks cf)
    prToken (t,k) = hsep [ convert t, lbrace, text ("PT _ (TS _ " ++ show k ++ ")"), rbrace ]

-- Happy doesn't allow characters such as åäö to occur in the happy file. This
-- is however not a restriction, just a naming paradigm in the happy source file.
convert :: String -> Doc
convert = quotes . text . escapeChars

rulesForHappy :: ModuleName -> Bool -> CF -> Rules
rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) ->
  (cat, map (constructRule absM functor) rules)

-- | For every non-terminal, we construct a set of rules. A rule is a sequence
-- of terminals and non-terminals, and an action to be performed.
--
-- >>> constructRule "Foo" False (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable)
-- ("Exp '+' Exp","Foo.EPlus $1 $3")
--
-- If we're using functors, it adds void value:
--
-- >>> constructRule "Foo" True (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable)
-- ("Exp '+' Exp","Foo.EPlus () $1 $3")
--
-- List constructors should not be prefixed by the abstract module name:
--
-- >>> constructRule "Foo" False (npRule "(:)" (ListCat (Cat "A")) [Left (Cat "A"), Right",", Left (ListCat (Cat "A"))] Parsable)
-- ("A ',' ListA","(:) $1 $3")
--
-- >>> constructRule "Foo" False (npRule "(:[])" (ListCat (Cat "A")) [Left (Cat "A")] Parsable)
-- ("A","(:[]) $1")
--
-- Coercion are much simpler:
--
-- >>> constructRule "Foo" True (npRule "_" (Cat "Exp") [Right "(", Left (Cat "Exp"), Right ")"] Parsable)
-- ("'(' Exp ')'","$2")
--
constructRule :: IsFun f => String -> Bool -> Rul f -> (Pattern, Action)
constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pattern, action)
  where
    fun = funName fun0
    (pattern, metavars) = generatePatterns rhs
    action | isCoercion fun                 = unwords metavars
           | isNilCons fun                  = unwords (qualify fun : metavars)
           | functor                        = unwords (qualify fun : "()" : metavars)
           | otherwise                      = unwords (qualify fun : metavars)
    qualify f
      | isConsFun f || isNilCons f = f
      | isDefinedRule f = absName ++ "." ++ mkDefName f
      | otherwise       = absName ++ "." ++ f
constructRule _ _ (Rule _ _ _ Internal) = undefined -- impossible


-- | Generate patterns and a set of metavariables (de Bruijn indices) indicating
--   where in the pattern the non-terminal are locate.
--
-- >>> generatePatterns [ Left (Cat "Exp"), Right "+", Left (Cat "Exp") ]
-- ("Exp '+' Exp",["$1","$3"])
--
generatePatterns :: SentForm -> (Pattern, [MetaVar])
generatePatterns []  = ("{- empty -}", [])
generatePatterns its =
  ( unwords $ for its $ either {-non-term:-} identCat {-term:-} (render . convert)
  , [ ('$' : show i) | (i, Left{}) <- zip [1 :: Int ..] its ]
  )

-- We have now constructed the patterns and actions,
-- so the only thing left is to merge them into one string.

-- |
-- >>> prRules "Foo" False [(Cat "Expr", [("Integer", "Foo.EInt $1"), ("Expr '+' Expr", "Foo.EPlus $1 $3")])]
-- Expr :: { Foo.Expr }
-- Expr : Integer { Foo.EInt $1 } | Expr '+' Expr { Foo.EPlus $1 $3 }
--
-- if there's a lot of cases, print on several lines:
-- >>> prRules "" False [(Cat "Expr", [("Abcd", "Action"), ("P2", "A2"), ("P3", "A3"), ("P4", "A4"), ("P5","A5")])]
-- Expr :: { Expr }
-- Expr : Abcd { Action }
--      | P2 { A2 }
--      | P3 { A3 }
--      | P4 { A4 }
--      | P5 { A5 }
--
-- >>> prRules "" False [(Cat "Internal", [])] -- nt has only internal use
-- <BLANKLINE>
--
-- The functor case:
-- >>> prRules "" True [(Cat "Expr", [("Integer", "EInt () $1"), ("Expr '+' Expr", "EPlus () $1 $3")])]
-- Expr :: { (Expr ()) }
-- Expr : Integer { EInt () $1 } | Expr '+' Expr { EPlus () $1 $3 }
--
-- A list with coercion: in the type signature we need to get rid of the
-- coercion.
--
-- >>> prRules "" True [(ListCat (CoercCat "Exp" 2), [("Exp2", "(:[]) $1"), ("Exp2 ',' ListExp2","(:) $1 $3")])]
-- ListExp2 :: { [Exp ()] }
-- ListExp2 : Exp2 { (:[]) $1 } | Exp2 ',' ListExp2 { (:) $1 $3 }
--
prRules :: ModuleName -> Bool -> Rules -> Doc
prRules absM functor = vsep . map prOne
  where
    prOne (_ , []      ) = empty -- nt has only internal use
    prOne (nt, (p,a):ls) =
        hsep [ nt', "::", "{", type' nt, "}" ]
        $$ nt' <+> sep (pr ":" (p, a) : map (pr "|") ls)
      where
        nt' = text (identCat nt)
        pr pre (p,a) = hsep [pre, text p, "{", text a , "}"]
    type' = catToType qualify $ if functor then "()" else empty
    qualify
      | null absM = id
      | otherwise = ((text absM <> ".") <>)

-- Finally, some haskell code.

footer :: String
footer = unlines $
    [ "{"
    , ""
    , "happyError :: [" ++ tokenName ++ "] -> Either String a"
    , "happyError ts = Left $"
    , "  \"syntax error at \" ++ tokenPos ts ++ "
    , "  case ts of"
    , "    []      -> []"
    , "    [Err _] -> \" due to lexer error\""
    , unwords
      [ "    t:_     -> \" before `\" ++"
      , "(prToken t)"
      -- , tokenTextUnpack tokenText "(prToken t)"
      , "++ \"'\""
      ]
    , ""
    , "myLexer = tokens"
    , "}"
    ]

-- | GF literals.
specialToks :: CF -> [String]
specialToks cf = (`map` literals cf) $ \case
  "Ident"   -> "L_Ident  { PT _ (TV $$) }"
  "String"  -> "L_quoted { PT _ (TL $$) }"
  "Integer" -> "L_integ  { PT _ (TI $$) }"
  "Double"  -> "L_doubl  { PT _ (TD $$) }"
  "Char"    -> "L_charac { PT _ (TC $$) }"
  own       -> "L_" ++ own ++ " { PT _ (T_" ++ own ++ " " ++ posn ++ ") }"
    where posn = if isPositionCat cf own then "_" else "$$"

specialRules :: ModuleName -> TokenText -> CF -> String
specialRules absName tokenText cf = unlines . intersperse "" . (`map` literals cf) $ \case
    -- "Ident"   -> "Ident   :: { Ident }"
    --         ++++ "Ident    : L_ident  { Ident $1 }"
    "String"  -> "String  :: { String }"
            ++++ "String   : L_quoted { " ++ stringUnpack "$1" ++ " }"
    "Integer" -> "Integer :: { Integer }"
            ++++ "Integer  : L_integ  { (read (" ++ stringUnpack "$1" ++ ")) :: Integer }"
    "Double"  -> "Double  :: { Double }"
            ++++ "Double   : L_doubl  { (read (" ++ stringUnpack "$1" ++ ")) :: Double }"
    "Char"    -> "Char    :: { Char }"
            ++++ "Char     : L_charac { (read (" ++ stringUnpack "$1" ++ ")) :: Char }"
    own       -> own ++ " :: { " ++ qualify own ++ "}"
            ++++ own ++ "  : L_" ++ own ++ " { " ++ qualify own ++ posn ++ " }"
      where posn = if isPositionCat cf own then " (mkPosToken $1)" else " $1"
  where
    stringUnpack = tokenTextUnpack tokenText
    qualify
      | null absName = id
      | otherwise    = ((absName ++ ".") ++)
